# *****************************************************************
# OVERVIEW ####
# *****************************************************************
# Filename: MoralNatConv_Code.R
# 
# Replication Code for LIWC Analysis in
#   "From Barack Obama to Donald Trump: The Evolution 
#   of Moral Appeals in National Conventions"
#
# Includes:
#  - Replication code for LIWC Analysis in Main Paper
#  - Replication code for STM discussed in Main Paper
#
# Data: National Convention Speeches from 2008 to 2020
#       For Democrat and Republican Parties
#
# Author: Jennifer Lin
# Updated: 2024 02 01


# *****************************************************************
# PACKAGES AND FUNCTIONS ####
# *****************************************************************

# This section provides information on installing the packages
#   that will be used throughout this script.
# If the package exists, the code will not reinstall it. Packages
#   come from both GitHub and CRAN

# Start with the CRAN Packages
# Note: This installs the full tidyverse but the code
#   only loads specific packages. If desired, install
#   dplyr and ggplot2 only
cran_pkgs <- c(
  "tidyverse", "quanteda", "psych", "effsize",
  "devtools", "stm"
  )

# The following loop installs the packages
for(i in seq.int(length(cran_pkgs))){
  if(cran_pkgs[i] %in% rownames(installed.packages()) == FALSE){
    install.packages(cran_pkgs[i])
  }
}

# quanteda.dictionaries is from GitHub and not available
#  in CRAN. Therefore, iinstall this using devtools
github_pkgs <- c(
  "kbenoit/quanteda.dictionaries"
)

for(i in seq.int(length(github_pkgs))){
  if(github_pkgs[i] %in% rownames(installed.packages()) == FALSE){
    devtools::install_github(github_pkgs[i])
  }
}

# The following set loads the packages
library(dplyr)                    # For %>%, filter(), select()
library(ggplot2)                  # For ggplot()
library(quanteda)                 # For corpus() and related
library(quanteda.dictionaries)    # For liwcalike() and MFD*
library(psych)                    # For describeBy()
library(effsize)                  # For cohen.d()
library(stm)                      # For STM functions

# *MFD stands for Moral Foundations Dictionary (2.0)
#   by Frimer et al, 2019

# Since there is some randomization in the selection
#   of cases, I implement a set.seed()
set.seed(20)

# *****************************************************************
# DATA ####
# *****************************************************************

# Prior to loading the data, it is easier to find the file names
#   so that we can load them using a loop rather than
#   load them individually. 
data_files <- dir(
  here::here("data/"),
  pattern    = ".+.rda$",    # Finds rda Files
  full.names = TRUE,         # List Full File Path Names
  recursive  = TRUE)         # Repeat

# Implement a for loop to load each of the data files.
for (i in seq.int(length(data_files))) {
  load(data_files[i])
}

# This should provide all of the data for each of the
#   national conventions for Democrats and Republicans 
#   from 2008 to 2020.

# *****************************************************************
# 2008 ####
# *****************************************************************

# *** The analyses for each of the years are the same. For purposes
#     of discussion and code comments, I will only comment the 
#     code for 2008 in detail. All other years repeat this
#     process *** 

# *** Format: All years start with the Democrats before the
#     Republicans. Like before, all carry the same format ***

# Democrat Party Analyses ~~~~~
# 1. Apply the Moral Foundations Dictionary to the 2008 corpus
DNC <- liwcalike(dnc08, dictionary = data_dictionary_MFD)
# 2. Rename the `docname` column that comes from liwcalike()
DNC$Filename <- DNC$docname
# 3. Merge the outcome with the metadata for the 2008 convention
DNC <- merge(DNC, dnc08_meta, by = "Filename")
# View the output, as desired
head(DNC)

# Republican Party Analyses ~~~~~
# 1. Apply the Moral Foundations Dictionary to the 2008 corpus
RNC <- liwcalike(rnc08, dictionary = data_dictionary_MFD)
# 2. Rename the `docname` column that comes from liwcalike()
RNC$Filename <- RNC$docname
# 3. Merge the outcome with the metadata for the 2008 convention
RNC <- merge(RNC, rnc08_meta, by = "Filename")
# View the output, as desired
head(RNC)

# Compare Random Samples ~~~~~
# As desited, uncomment the following to draw a random
#   sample of the Democrat texts for compairson to
#   the number of speeches for Republicans
# DNC <- sample_n(DNC, nrow(RNC))

# Prepare Results for Analysis ~~~~~
# 1. To analyze the results, combine the DNC and RNC
#    data frames to one and filter to only include 
#    those labeled as "speech"
speech <- rbind(RNC, DNC) %>% 
  filter(Type == "Speech")
# 2. For each foundation, sum the appeals to the positive
#    and negative sentiments to create a composite score that 
#    shows the total appeals to each foundation.
speech <- speech %>% 
  mutate(
    Harm      = care.virtue + care.vice,
    Fairness  = fairness.virtue + fairness.vice,
    Ingroup   = loyalty.virtue + loyalty.vice,
    Authority = authority.virtue + authority.vice,
    Purity    = sanctity.virtue + sanctity.vice
  )

# Validation Selection ~~~~~ 
# To validate the results, I selected the top 5 speeches that
#   made appeals to each of the foundations. The code to 
#   make these selectins are as follows
# 1. Harm
top5_Harm <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Harm, n = 5)
# 2. Fairness
top5_Fairness <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Fairness, n = 5)
# 3. Ingroup
top5_Ingroup <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Ingroup, n = 5)
# 4. Authority
top5_Authority <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Authority, n = 5)
# 5. Purity
top5_Purity <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Purity, n = 5)

# Analyze Results ~~~~~

# For the comparisons, I use the Bonferoni corrections for
#   multiple comparisons. To store the results, here is
#   an empty vector that we will use for the p-values
p <- vector("numeric", 5L)

# For each of the following foundations, the analyses are
#   the same. I will describe the analyses for Harm in greater
#   detail. All other code follows the same structure and is 
#   repeated for subsequent foundations

# 1. Harm
# a. Compare the appeals to the Harm foundation by Convention.
#    This generate overall summary statistics using 
#    psych::describeBy()
describeBy(speech$Harm, speech$Convention)
# b. Conduct a t-test to compare the appeals to Harm 
Harm_Test <- t.test(speech$Harm ~ speech$Convention)
#    View the results
Harm_Test
# c. Calculate an effect size measure using 
#    psych::cohen.d()
cohen.d(speech$Harm, speech$Convention)
# d. Store the p-value from the t-test into the
#    empty vector that we previously generated
#    for use later on.
p[1] <- Harm_Test[["p.value"]]

# 2. Fairness
describeBy(speech$Fairness, speech$Convention)
Fairness_Test <- t.test(speech$Fairness ~ speech$Convention)
Fairness_Test
cohen.d(speech$Fairness, speech$Convention)
p[2] <- Fairness_Test[["p.value"]]

# 3. Ingroup 
describeBy(speech$Ingroup, speech$Convention)
Ingroup_Test <- t.test(speech$Ingroup ~ speech$Convention)
Ingroup_Test
cohen.d(speech$Ingroup, speech$Convention)
p[3] <- Ingroup_Test[["p.value"]]

# 4. Authority
describeBy(speech$Authority, speech$Convention)
Authority_Test <- t.test(speech$Authority ~ speech$Convention)
Authority_Test
cohen.d(speech$Authority, speech$Convention)
p[4] <- Authority_Test[["p.value"]]

# 5. Purity 
describeBy(speech$Purity, speech$Convention)
Purity_Test <- t.test(speech$Purity ~ speech$Convention)
Purity_Test
cohen.d(speech$Purity, speech$Convention)
p[5] <- Purity_Test[["p.value"]]

# Now that we have all of the p-values, we can 
#   calculate a Bonferoni adjustment on these 
#   values to account for multiple compairsons
p.adjust(p, method = "bonferroni", n = length(p))

# Generate Summary Statistics for Plotting ~~~~~
# The next section of vode provides the summary 
#    statistics that will be useful for generating plots
#    that compare each foundation by convention. As before,
#    the procedure for each foundation is the same, so I 
#    will only demonstrate the first one in greater detail.

# 1. Harm
Harm <- speech %>% 
  # We are interested in the summary statistics per party
  #   convention, and group_by() helps with this
  group_by(Convention) %>% 
  # Generate summary statistics
  summarize(
    mean = mean(Harm, na.rm = TRUE), 
    sd   = sd(Harm, na.rm = TRUE),   
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Harm),
    min  = min(Harm),
    med  = median(Harm),
    .groups = "keep"
  ) %>% 
  # This creates a variable indicating the Moral foundations type
  mutate(type = "Harm")

# 2. Fairness
Fairness <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Fairness, na.rm = TRUE),
    sd   = sd(Fairness, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Fairness),
    min  = min(Fairness),
    med  = median(Fairness),
    .groups = "keep"
  ) %>% 
  mutate(type = "Fairness")

# 3. Ingroup
Ingroup <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Ingroup, na.rm = TRUE),
    sd   = sd(Ingroup, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Ingroup),
    min  = min(Ingroup),
    med  = median(Ingroup),
    .groups = "keep"
  ) %>% 
  mutate(type = "Ingroup")

# 4. Authority
Authority <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Authority, na.rm = TRUE),
    sd   = sd(Authority, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Authority),
    min  = min(Authority),
    med  = median(Authority),
    .groups = "keep"
  ) %>% 
  mutate(type = "Authority")

# 5. Purity
Purity <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Purity, na.rm = TRUE),
    sd   = sd(Purity, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Purity),
    min  = min(Purity),
    med  = median(Purity),
    .groups = "keep"
  ) %>% 
  mutate(type = "Purity")

#Combine each of the outputs to form one data frame
Conv_08 <- rbind(Harm, Fairness, Ingroup, Authority, Purity)

#Organize label order for the foundations
Conv_08$type <- factor(
  Conv_08$type, 
  levels = c("Harm", "Fairness", "Ingroup", "Authority", "Purity"))

#Create Graph with foundation on x axis and fill by convention type
#Color Democrat Blue and Republican as Red
ggplot(Conv_08, aes(x = type, y = mean, fill = Convention))+
  geom_bar(
    stat="identity", position=position_dodge(), color = "black") +
  geom_errorbar(
    aes(
      ymin=mean-ci, ymax=mean+ci), 
    width=.2, position=position_dodge(.9))+
  ggtitle("Moral Appeals in Political Speeches: 2008") + 
  theme_bw()+
  xlab("Moral Foundation") +
  ylab("Average Percentage of Appeals") + 
  labs(caption = "Source: 2008 RNC and DNC")+
  theme(
    text = element_text(size = 16, colour="black"),
    axis.title = element_text(size = 18, colour="black"),
    title = element_text(size = 20, colour="black"),
    plot.caption = element_text(size = 10, color = "black"),
    axis.text.x = element_text(
      angle = 0, hjust = 0.5, vjust = 0.5),
    plot.title = element_text(hjust = 0.5))+
  scale_fill_manual(
    "Convention", 
    values = c(
      "DNC" = "#6baed6", 
      "RNC" = "#fb6a4a"
    )
  )

# Descriptive Statistics ~~~~~

# This code generates some summary statistics
#   about each of the conventions
about_08 <- speech %>% 
  group_by(Convention) %>% 
  summarise(
    # Number of speeches
    n_speech = n(),
    # Overall Word Count
    w_count  = sum(WC, na.rm = TRUE),
    .groups  = 'keep'
  )

# Structural Topic Model (STM) ~~~~~

# ***The following section generates structural topic models
#    for the speeches in each convention. Like the analyses
#    in the setion above, this section is repeated for each year.
#    As such, I will discuss the analyses in detail in this first
#    appearance and please reference this for
#    future sections. ***

# Democrat Party STM ~~~~~

# 1. Process the data
# The following code takes text from the 2008 corpus and 
#   the appropriate metadata from its metadata file through
#   the text processor. In this step, I also
#   a. Remove Stopwords
#   b. Remove Numbers
#   c. Convert words to lowercase
#   d. Stem the words
#   e. Remove all punctuations
processed <- textProcessor(
  dnc08,
  data.frame(dnc08_meta),
  removestopwords=TRUE, 
  removenumbers = TRUE,
  lowercase=TRUE, 
  stem=TRUE,
  removepunctuation = TRUE
)
# Once the documents are processed, prepare them for
#   analysis using the code below
out <- prepDocuments(
  processed$documents, processed$vocab, processed$meta
)
# Assign the output to its own object for analysis
documents <- out$documents
vocab <- out$vocab
meta <- out$meta

# 2. Construct the topic model for the Democrat convention
DNC2008 <- stm(
  documents, vocab,
  K = 20, max.em.its = 75, data = out$meta,
  init.type = "Spectral")
# 3. Generate a summary of the output
summary(DNC2008)
# 4. Plot the Results
plot.STM(DNC2008,
         type="summary", 
         xlim=c(0,.4))

# Republican Party STM ~~~~~

# The steps for the Republican party STM construction is
#   similar to the Democrats above. Please reference 
#   above for code comments and descriptons

processed <- textProcessor(
  rnc08,
  data.frame(rnc08_meta),
  removestopwords=TRUE, 
  removenumbers = TRUE,
  lowercase=TRUE, 
  stem=TRUE,
  removepunctuation = TRUE
)

out <- prepDocuments(
  processed$documents, processed$vocab, processed$meta
)

documents <- out$documents
vocab <- out$vocab
meta <- out$meta

RNC2008 <- stm(
  documents, vocab,
  K = 20, max.em.its = 75, data = out$meta,
  init.type = "Spectral")

summary(RNC2008)

plot.STM(RNC2008,
         type="summary", 
         xlim=c(0,.4))

# *****************************************************************
# 2012 ####
# *****************************************************************

# Democrat Party Analyses ~~~~~
DNC <- liwcalike(dnc12, dictionary = data_dictionary_MFD)
DNC$Filename <- DNC$docname
DNC <- merge(DNC, dnc12_meta, by = "Filename")
head(DNC)

# Republican Party Analyses ~~~~~
RNC <- liwcalike(rnc12, dictionary = data_dictionary_MFD)
RNC$Filename <- RNC$docname
RNC <- merge(RNC, rnc12_meta, by = "Filename")
head(RNC)

# Compare Random Samples ~~~~~
# DNC <- sample_n(DNC, nrow(RNC))

# Prepare Results for Analyses ~~~~~
speech <- rbind(RNC, DNC) %>% 
  filter(Type == "Speech")

speech <- speech %>% 
  mutate(
    Harm      = care.virtue + care.vice,
    Fairness  = fairness.virtue + fairness.vice,
    Ingroup   = loyalty.virtue + loyalty.vice,
    Authority = authority.virtue + authority.vice,
    Purity    = sanctity.virtue + sanctity.vice
  )

# Validation Selection ~~~~~
top5_Harm <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Harm, n = 5)

top5_Fairness <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Fairness, n = 5)

top5_Ingroup <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Ingroup, n = 5)

top5_Authority <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Authority, n = 5)

top5_Purity <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Purity, n = 5)

# Analyze Results ~~~~~

p <- vector("numeric", 5L)

# 1. Harm 
describeBy(speech$Harm, speech$Convention)
Harm_Test <- t.test(speech$Harm ~ speech$Convention)
Harm_Test
cohen.d(speech$Harm, speech$Convention)
p[1] <- Harm_Test[["p.value"]]

# 2. Fairness 
describeBy(speech$Fairness, speech$Convention)
Fairness_Test <- t.test(speech$Fairness ~ speech$Convention)
Fairness_Test
cohen.d(speech$Fairness, speech$Convention)
p[2] <- Fairness_Test[["p.value"]]

# 3. Ingroup 
describeBy(speech$Ingroup, speech$Convention)
Ingroup_Test <- t.test(speech$Ingroup ~ speech$Convention)
Ingroup_Test
cohen.d(speech$Ingroup, speech$Convention)
p[3] <- Ingroup_Test[["p.value"]]

# 4. Authority 
describeBy(speech$Authority, speech$Convention)
Authority_Test <- t.test(speech$Authority ~ speech$Convention)
Authority_Test
cohen.d(speech$Authority, speech$Convention)
p[4] <- Authority_Test[["p.value"]]

# 5. Purity 
describeBy(speech$Purity, speech$Convention)
Purity_Test <- t.test(speech$Purity ~ speech$Convention)
Purity_Test
cohen.d(speech$Purity, speech$Convention)
p[5] <- Purity_Test[["p.value"]]

p.adjust(p, method = "bonferroni", n = length(p))

# Generate Summary Statistics for Plotting ~~~~~~
Harm <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Harm, na.rm = TRUE),
    sd   = sd(Harm, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Harm),
    min  = min(Harm),
    med  = median(Harm),
    .groups = "keep"
  ) %>% 
  mutate(type = "Harm")

Fairness <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Fairness, na.rm = TRUE),
    sd   = sd(Fairness, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Fairness),
    min  = min(Fairness),
    med  = median(Fairness),
    .groups = "keep"
  ) %>% 
  mutate(type = "Fairness")

Ingroup <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Ingroup, na.rm = TRUE),
    sd   = sd(Ingroup, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Ingroup),
    min  = min(Ingroup),
    med  = median(Ingroup),
    .groups = "keep"
  ) %>% 
  mutate(type = "Ingroup")

Authority <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Authority, na.rm = TRUE),
    sd   = sd(Authority, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Authority),
    min  = min(Authority),
    med  = median(Authority),
    .groups = "keep"
  ) %>% 
  mutate(type = "Authority")

Purity <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Purity, na.rm = TRUE),
    sd   = sd(Purity, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Purity),
    min  = min(Purity),
    med  = median(Purity),
    .groups = "keep"
  ) %>% 
  mutate(type = "Purity")

#Combine each of the outputs
Conv_12 <- rbind(Harm, Fairness, Ingroup, Authority, Purity)

#Organize label order for the foundations
Conv_12$type <- factor(
  Conv_12$type, 
  levels = c("Harm", "Fairness", "Ingroup", "Authority", "Purity"))

#Create Graph with foundation on x axis and fill by convention type
#Color Democrat Blue and Republican as Red
ggplot(Conv_12, aes(x = type, y = mean, fill = Convention))+
  geom_bar(
    stat="identity", position=position_dodge(), color = "black") +
  geom_errorbar(
    aes(ymin=mean-ci, ymax=mean+ci), 
    width=.2, position=position_dodge(.9))+
  ggtitle("Moral Appeals in Political Speeches: 2012") + 
  theme_bw()+
  xlab("Moral Foundation") +
  ylab("Average Percentage of Appeals") + 
  labs(caption = "Source: 2012 RNC and DNC")+
  theme(
    text = element_text(size = 16, colour="black"),
    axis.title = element_text(size = 18, colour="black"),
    title = element_text(size = 20, colour="black"),
    plot.caption = element_text(size = 10, color = "black"),
    axis.text.x = element_text(
      angle = 0, hjust = 0.5, vjust = 0.5),
    plot.title = element_text(hjust = 0.5))+
  scale_fill_manual(
    "Convention", 
    values = c(
      "DNC" = "#6baed6", 
      "RNC" = "#fb6a4a"
    )
  )

# Descriptive Statistics ~~~~~
about_12 <- speech %>% 
  group_by(Convention) %>% 
  summarise(
    n_speech = n(),
    w_count  = sum(WC, na.rm = TRUE),
    .groups  = 'keep'
  )

# Structural Topic Model (STM) ~~~~~

# Democrat Party STM ~~~~~

processed <- textProcessor(
  dnc12,
  data.frame(dnc12_meta),
  removestopwords=TRUE, 
  removenumbers = TRUE,
  lowercase=TRUE, 
  stem=TRUE,
  removepunctuation = TRUE
)

out <- prepDocuments(
  processed$documents, processed$vocab, processed$meta
)

documents <- out$documents
vocab <- out$vocab
meta <- out$meta

DNC2012 <- stm(
  documents, vocab,
  K = 20, max.em.its = 75, data = out$meta,
  init.type = "Spectral")

summary(DNC2012)

plot.STM(DNC2012,
         type="summary", 
         xlim=c(0,.4))

# Republican Party STM ~~~~~

processed <- textProcessor(
  rnc12,
  data.frame(rnc12_meta),
  removestopwords=TRUE, 
  removenumbers = TRUE,
  lowercase=TRUE, 
  stem=TRUE,
  removepunctuation = TRUE
)

out <- prepDocuments(
  processed$documents, processed$vocab, processed$meta
)

documents <- out$documents
vocab <- out$vocab
meta <- out$meta

RNC2012 <- stm(
  documents, vocab,
  K = 20, max.em.its = 75, data = out$meta,
  init.type = "Spectral")

summary(RNC2012)

plot.STM(RNC2012,
         type="summary", 
         xlim=c(0,.4))


# *****************************************************************
# 2016 ####
# *****************************************************************

# Democrat Party Analyses ~~~~~

DNC <- liwcalike(dnc16, dictionary = data_dictionary_MFD)
DNC$Filename <- DNC$docname
DNC <- merge(DNC, dnc16_meta, by = "Filename")
head(DNC)

# Republican Party Analyses ~~~~~

RNC <- liwcalike(rnc16, dictionary = data_dictionary_MFD)
RNC$Filename <- RNC$docname
RNC <- merge(RNC, rnc16_meta, by = "Filename")
head(RNC)

# Compare Random Samples ~~~~
# DNC <- sample_n(DNC, nrow(RNC))

# Prepare Results for Analyses ~~~~~

speech <- rbind(RNC, DNC) %>% 
  filter(Type == "speech")

speech <- speech %>% 
  mutate(
    Harm      = care.virtue + care.vice,
    Fairness  = fairness.virtue + fairness.vice,
    Ingroup   = loyalty.virtue + loyalty.vice,
    Authority = authority.virtue + authority.vice,
    Purity    = sanctity.virtue + sanctity.vice
  )

# Validation Selection ~~~~~

top5_Harm <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Harm, n = 5)

top5_Fairness <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Fairness, n = 5)

top5_Ingroup <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Ingroup, n = 5)

top5_Authority <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Authority, n = 5)

top5_Purity <- speech %>% 
  group_by(Convention) %>% 
  slice_max(order_by = Purity, n = 5)


# Analyse Results ~~~~~~

p <- vector("numeric", 5L)

# 1. Harm
describeBy(speech$Harm, speech$Convention)
Harm_Test <- t.test(speech$Harm ~ speech$Convention)
Harm_Test
cohen.d(speech$Harm, speech$Convention)
p[1] <- Harm_Test[["p.value"]]

# 2. Fairness 
describeBy(speech$Fairness, speech$Convention)
Fairness_Test <- t.test(speech$Fairness ~ speech$Convention)
Fairness_Test
cohen.d(speech$Fairness, speech$Convention)
p[2] <- Fairness_Test[["p.value"]]

# 3. Ingroup 
describeBy(speech$Ingroup, speech$Convention)
Ingroup_Test <- t.test(speech$Ingroup ~ speech$Convention)
Ingroup_Test
cohen.d(speech$Ingroup, speech$Convention)
p[3] <- Ingroup_Test[["p.value"]]

# 4. Authority 
describeBy(speech$Authority, speech$Convention)
Authority_Test <- t.test(speech$Authority ~ speech$Convention)
Authority_Test
cohen.d(speech$Authority, speech$Convention)
p[4] <- Authority_Test[["p.value"]]

# 5. Purity 
describeBy(speech$Purity, speech$Convention)
Purity_Test <- t.test(speech$Purity ~ speech$Convention)
Purity_Test
cohen.d(speech$Purity, speech$Convention)
p[5] <- Purity_Test[["p.value"]]

p.adjust(p, method = "bonferroni", n = length(p))

# Prepare Results for Plotting ~~~~~

Harm <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Harm, na.rm = TRUE),
    sd   = sd(Harm, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Harm),
    min  = min(Harm),
    med  = median(Harm),
    .groups = "keep"
  ) %>% 
  mutate(type = "Harm")

Fairness <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Fairness, na.rm = TRUE),
    sd   = sd(Fairness, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Fairness),
    min  = min(Fairness),
    med  = median(Fairness),
    .groups = "keep"
  ) %>% 
  mutate(type = "Fairness")

Ingroup <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Ingroup, na.rm = TRUE),
    sd   = sd(Ingroup, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Ingroup),
    min  = min(Ingroup),
    med  = median(Ingroup),
    .groups = "keep"
  ) %>% 
  mutate(type = "Ingroup")

Authority <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Authority, na.rm = TRUE),
    sd   = sd(Authority, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Authority),
    min  = min(Authority),
    med  = median(Authority),
    .groups = "keep"
  ) %>% 
  mutate(type = "Authority")

Purity <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Purity, na.rm = TRUE),
    sd   = sd(Purity, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Purity),
    min  = min(Purity),
    med  = median(Purity),
    .groups = "keep"
  ) %>% 
  mutate(type = "Purity")

#Combine each of the outputs
Conv_16 <- rbind(Harm, Fairness, Ingroup, Authority, Purity)

#Organize label order for the foundations
Conv_16$type <- factor(
  Conv_16$type, 
  levels = c("Harm", "Fairness", "Ingroup", "Authority", "Purity"))

#Create Graph with foundation on x axis and fill by convention type
#Color Democrat Blue and Republican as Red
ggplot(Conv_16, aes(x = type, y = mean, fill = Convention))+
  geom_bar(
    stat="identity", position=position_dodge(), color = "black") +
  geom_errorbar(
    aes(ymin=mean-ci, ymax=mean+ci), 
    width=.2, position=position_dodge(.9))+
  ggtitle("Moral Appeals in Political Speeches: 2016") + 
  theme_bw()+
  xlab("Moral Foundation") +
  ylab("Average Percentage of Appeals") + 
  labs(caption = "Source: 2016 RNC and DNC")+
  theme(
    text = element_text(size = 16, colour="black"),
    axis.title = element_text(size = 18, colour="black"),
    title = element_text(size = 20, colour="black"),
    plot.caption = element_text(size = 10, color = "black"),
    axis.text.x = element_text(
      angle = 0, hjust = 0.5, vjust = 0.5),
    plot.title = element_text(hjust = 0.5))+
  scale_fill_manual(
    "Convention", 
    values = c(
      "DNC" = "#6baed6", 
      "RNC" = "#fb6a4a"
    )
  )

# Descriptive Statistics ~~~~~
about_16 <- speech %>% 
  group_by(Convention) %>% 
  summarise(
    n_speech = n(),
    w_count  = sum(WC, na.rm = TRUE),
    .groups  = 'keep'
  )

# Structural Topic Models (STMs) ~~~~~

# Democrat Party STM ~~~~~

processed <- textProcessor(
  dnc16,
  data.frame(dnc16_meta),
  removestopwords=TRUE, 
  removenumbers = TRUE,
  lowercase=TRUE, 
  stem=TRUE,
  removepunctuation = TRUE
)

out <- prepDocuments(
  processed$documents, processed$vocab, processed$meta
)

documents <- out$documents
vocab <- out$vocab
meta <- out$meta

DNC2016 <- stm(
  documents, vocab,
  K = 20, max.em.its = 75, data = out$meta,
  init.type = "Spectral")

summary(DNC2016)

plot.STM(DNC2016,
         type="summary", 
         xlim=c(0,.4))

# Republican Party STM ~~~~~

processed <- textProcessor(
  rnc16,
  data.frame(rnc16_meta),
  removestopwords=TRUE, 
  removenumbers = TRUE,
  lowercase=TRUE, 
  stem=TRUE,
  removepunctuation = TRUE
)

out <- prepDocuments(
  processed$documents, processed$vocab, processed$meta
)

documents <- out$documents
vocab <- out$vocab
meta <- out$meta

RNC2016 <- stm(
  documents, vocab,
  K = 20, max.em.its = 75, data = out$meta,
  init.type = "Spectral")

summary(RNC2016)

plot.STM(RNC2016,
         type="summary", 
         xlim=c(0,.4))


# *****************************************************************
# 2020 ####
# *****************************************************************

# Democrat Party Analysis ~~~~~

DNC <- liwcalike(dnc20$text, dictionary = data_dictionary_MFD)
DNC <- cbind(DNC, dnc20)
head(DNC)

# Republican Party Analysis ~~~~~

RNC <- liwcalike(rnc20$text, dictionary = data_dictionary_MFD)
RNC <- cbind(RNC, rnc20)
head(RNC)

# Compare Random Samples ~~~~~
# DNC <- sample_n(DNC, nrow(RNC))

# Prepare Results for Analyses ~~~~~~

speech <- rbind(RNC, DNC)

speech <- speech %>% 
  mutate(
    Harm      = care.virtue + care.vice,
    Fairness  = fairness.virtue + fairness.vice,
    Ingroup   = loyalty.virtue + loyalty.vice,
    Authority = authority.virtue + authority.vice,
    Purity    = sanctity.virtue + sanctity.vice
  )

# Analyze Results ~~~~~

p <- vector("numeric", 5L)

# 1. Harm 
describeBy(speech$Harm, speech$Convention)
Harm_Test <- t.test(speech$Harm ~ speech$Convention)
Harm_Test
cohen.d(speech$Harm, speech$Convention)
p[1] <- Harm_Test[["p.value"]]

# 2. Fairness 
describeBy(speech$Fairness, speech$Convention)
Fairness_Test <- t.test(speech$Fairness ~ speech$Convention)
Fairness_Test
cohen.d(speech$Fairness, speech$Convention)
p[2] <- Fairness_Test[["p.value"]]

# 3. Ingroup 
describeBy(speech$Ingroup, speech$Convention)
Ingroup_Test <- t.test(speech$Ingroup ~ speech$Convention)
Ingroup_Test
cohen.d(speech$Ingroup, speech$Convention)
p[3] <- Ingroup_Test[["p.value"]]

# 4. Authority 
describeBy(speech$Authority, speech$Convention)
Authority_Test <- t.test(speech$Authority ~ speech$Convention)
Authority_Test
cohen.d(speech$Authority, speech$Convention)
p[4] <- Authority_Test[["p.value"]]

# 5. Purity 
describeBy(speech$Purity, speech$Convention)
Purity_Test <- t.test(speech$Purity ~ speech$Convention)
Purity_Test
cohen.d(speech$Purity, speech$Convention)
p[5] <- Purity_Test[["p.value"]]

p.adjust(p, method = "bonferroni", n = length(p))

# Prepare Results for Plotting ~~~~~

Harm <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Harm, na.rm = TRUE),
    sd   = sd(Harm, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Harm),
    min  = min(Harm),
    med  = median(Harm),
    .groups = "keep"
  ) %>% 
  mutate(type = "Harm")

Fairness <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Fairness, na.rm = TRUE),
    sd   = sd(Fairness, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Fairness),
    min  = min(Fairness),
    med  = median(Fairness),
    .groups = "keep"
  ) %>% 
  mutate(type = "Fairness")

Ingroup <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Ingroup, na.rm = TRUE),
    sd   = sd(Ingroup, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Ingroup),
    min  = min(Ingroup),
    med  = median(Ingroup),
    .groups = "keep"
  ) %>% 
  mutate(type = "Ingroup")

Authority <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Authority, na.rm = TRUE),
    sd   = sd(Authority, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Authority),
    min  = min(Authority),
    med  = median(Authority),
    .groups = "keep"
  ) %>% 
  mutate(type = "Authority")

Purity <- speech %>% 
  group_by(Convention) %>% 
  summarize(
    mean = mean(Purity, na.rm = TRUE),
    sd   = sd(Purity, na.rm = TRUE),
    n    = n(),
    se   = sd/sqrt(n),
    ci   = qt(0.975,df=n-1)*se,
    max  = max(Purity),
    min  = min(Purity),
    med  = median(Purity),
    .groups = "keep"
  ) %>% 
  mutate(type = "Purity")

#Combine each of the outputs
Conv_20 <- rbind(Harm, Fairness, Ingroup, Authority, Purity)

#Organize label order for the foundations
Conv_20$type <- factor(
  Conv_20$type, 
  levels = c("Harm", "Fairness", "Ingroup", "Authority", "Purity"))

#Create Graph with foundation on x axis and fill by convention type
#Color Democrat Blue and Republican as Red
ggplot(Conv_20, aes(x = type, y = mean, fill = Convention))+
  geom_bar(
    stat="identity", position=position_dodge(), color = "black") +
  geom_errorbar(
    aes(ymin=mean-ci, ymax=mean+ci), 
    width=.2, position=position_dodge(.9))+
  ggtitle("Moral Appeals in Political Speeches: 2020") + 
  theme_bw()+
  xlab("Moral Foundation") +
  ylab("Average Percentage of Appeals") + 
  labs(caption = "Source: 2020 RNC and DNC")+
  theme(
    text = element_text(size = 16, colour="black"),
    axis.title = element_text(size = 18, colour="black"),
    title = element_text(size = 20, colour="black"),
    plot.caption = element_text(size = 10, color = "black"),
    axis.text.x = element_text(
      angle = 0, hjust = 0.5, vjust = 0.5),
    plot.title = element_text(hjust = 0.5))+
  scale_fill_manual(
    "Convention", 
    values = c(
      "DNC" = "#6baed6", 
      "RNC" = "#fb6a4a"
    )
  )

# Descriptive Statistics ~~~~~
about_20 <- speech %>% 
  group_by(Convention) %>% 
  summarise(
    n_speech = n(),
    w_count  = sum(WC, na.rm = TRUE),
    .groups  = 'keep'
  )

# Structural Topic Models (STMs) ~~~~~

# Democrat Party STM ~~~~~

processed <- textProcessor(
  dnc20_select,
  data.frame(dnc20_select_meta),
  removestopwords=TRUE, 
  removenumbers = TRUE,
  lowercase=TRUE, 
  stem=TRUE,
  removepunctuation = TRUE
)

out <- prepDocuments(
  processed$documents, processed$vocab, processed$meta
)

documents <- out$documents
vocab <- out$vocab
meta <- out$meta

DNC2020 <- stm(
  documents, vocab,
  K = 20, max.em.its = 75, data = out$meta,
  init.type = "Spectral")

summary(DNC2020)

plot.STM(DNC2020,
         type="summary", 
         xlim=c(0,.4))

# Republican Party STM ~~~~~

processed <- textProcessor(
  rnc20_select,
  data.frame(rnc20_select_meta),
  removestopwords=TRUE, 
  removenumbers = TRUE,
  lowercase=TRUE, 
  stem=TRUE,
  removepunctuation = TRUE
)

out <- prepDocuments(
  processed$documents, processed$vocab, processed$meta
)

documents <- out$documents
vocab <- out$vocab
meta <- out$meta

RNC2020 <- stm(
  documents, vocab,
  K = 20, max.em.its = 75, data = out$meta,
  init.type = "Spectral")

summary(RNC2020)

plot.STM(RNC2020,
         type="summary", 
         xlim=c(0,.4))
